{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "20a03f4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import datetime\n",
    "from datetime import date, timedelta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "bd72ce5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>date_positive</th>\n",
       "      <th>date_dose1</th>\n",
       "      <th>date_dose2</th>\n",
       "      <th>vaxtype</th>\n",
       "      <th>state</th>\n",
       "      <th>age</th>\n",
       "      <th>male</th>\n",
       "      <th>bid</th>\n",
       "      <th>malaysian</th>\n",
       "      <th>comorb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2020-03-17</td>\n",
       "      <td>2020-03-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Johor</td>\n",
       "      <td>34</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2020-03-17</td>\n",
       "      <td>2020-03-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sarawak</td>\n",
       "      <td>60</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2020-03-20</td>\n",
       "      <td>2020-03-11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sabah</td>\n",
       "      <td>58</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>2020-03-17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Kelantan</td>\n",
       "      <td>69</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>2020-03-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Melaka</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sarawak</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>2020-03-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>W.P. Kuala Lumpur</td>\n",
       "      <td>57</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2020-03-22</td>\n",
       "      <td>2020-03-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Perlis</td>\n",
       "      <td>48</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2020-03-22</td>\n",
       "      <td>2020-03-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Pulau Pinang</td>\n",
       "      <td>73</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2020-03-22</td>\n",
       "      <td>2020-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sarawak</td>\n",
       "      <td>80</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date date_positive date_dose1 date_dose2 vaxtype              state  \\\n",
       "0  2020-03-17    2020-03-12        NaN        NaN     NaN              Johor   \n",
       "1  2020-03-17    2020-03-14        NaN        NaN     NaN            Sarawak   \n",
       "2  2020-03-20    2020-03-11        NaN        NaN     NaN              Sabah   \n",
       "3  2020-03-21    2020-03-17        NaN        NaN     NaN           Kelantan   \n",
       "4  2020-03-21    2020-03-13        NaN        NaN     NaN             Melaka   \n",
       "5  2020-03-21    2020-03-21        NaN        NaN     NaN            Sarawak   \n",
       "6  2020-03-21    2020-03-14        NaN        NaN     NaN  W.P. Kuala Lumpur   \n",
       "7  2020-03-22    2020-03-18        NaN        NaN     NaN             Perlis   \n",
       "8  2020-03-22    2020-03-14        NaN        NaN     NaN       Pulau Pinang   \n",
       "9  2020-03-22    2020-03-20        NaN        NaN     NaN            Sarawak   \n",
       "\n",
       "  age male bid malaysian comorb  \n",
       "0  34    1   0         1      1  \n",
       "1  60    1   0         1      1  \n",
       "2  58    1   0         1      1  \n",
       "3  69    1   0         1      1  \n",
       "4  50    1   0         1      1  \n",
       "5  39    0   0         1      1  \n",
       "6  57    1   0         1      1  \n",
       "7  48    1   0         1      1  \n",
       "8  73    1   0         1      1  \n",
       "9  80    0   1         1      1  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_linelist = 'https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/linelist/linelist_deaths.csv'\n",
    "df = pd.read_csv(url_linelist, dtype=str)\n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6b38da72",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>date_positive</th>\n",
       "      <th>date_dose1</th>\n",
       "      <th>date_dose2</th>\n",
       "      <th>vaxtype</th>\n",
       "      <th>state</th>\n",
       "      <th>age</th>\n",
       "      <th>male</th>\n",
       "      <th>bid</th>\n",
       "      <th>malaysian</th>\n",
       "      <th>comorb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2020-03-17</td>\n",
       "      <td>2020-03-12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Johor</td>\n",
       "      <td>34</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2020-03-17</td>\n",
       "      <td>2020-03-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sarawak</td>\n",
       "      <td>60</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2020-03-20</td>\n",
       "      <td>2020-03-11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sabah</td>\n",
       "      <td>58</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>2020-03-17</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Kelantan</td>\n",
       "      <td>69</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>2020-03-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Melaka</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sarawak</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2020-03-21</td>\n",
       "      <td>2020-03-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>W.P. Kuala Lumpur</td>\n",
       "      <td>57</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2020-03-22</td>\n",
       "      <td>2020-03-18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Perlis</td>\n",
       "      <td>48</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2020-03-22</td>\n",
       "      <td>2020-03-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Pulau Pinang</td>\n",
       "      <td>73</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2020-03-22</td>\n",
       "      <td>2020-03-20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2021-09-19</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sarawak</td>\n",
       "      <td>80</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date date_positive date_dose1  date_dose2 vaxtype              state  \\\n",
       "0  2020-03-17    2020-03-12        NaN  2021-09-19     NaN              Johor   \n",
       "1  2020-03-17    2020-03-14        NaN  2021-09-19     NaN            Sarawak   \n",
       "2  2020-03-20    2020-03-11        NaN  2021-09-19     NaN              Sabah   \n",
       "3  2020-03-21    2020-03-17        NaN  2021-09-19     NaN           Kelantan   \n",
       "4  2020-03-21    2020-03-13        NaN  2021-09-19     NaN             Melaka   \n",
       "5  2020-03-21    2020-03-21        NaN  2021-09-19     NaN            Sarawak   \n",
       "6  2020-03-21    2020-03-14        NaN  2021-09-19     NaN  W.P. Kuala Lumpur   \n",
       "7  2020-03-22    2020-03-18        NaN  2021-09-19     NaN             Perlis   \n",
       "8  2020-03-22    2020-03-14        NaN  2021-09-19     NaN       Pulau Pinang   \n",
       "9  2020-03-22    2020-03-20        NaN  2021-09-19     NaN            Sarawak   \n",
       "\n",
       "  age male bid malaysian comorb  \n",
       "0  34    1   0         1      1  \n",
       "1  60    1   0         1      1  \n",
       "2  58    1   0         1      1  \n",
       "3  69    1   0         1      1  \n",
       "4  50    1   0         1      1  \n",
       "5  39    0   0         1      1  \n",
       "6  57    1   0         1      1  \n",
       "7  48    1   0         1      1  \n",
       "8  73    1   0         1      1  \n",
       "9  80    0   1         1      1  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# wrangle relevant date columns\n",
    "df.date = pd.to_datetime(df.date, errors='coerce').dt.date\n",
    "df.date_positive = pd.to_datetime(df.date_positive, errors='coerce').dt.date\n",
    "assert len(df[df.date.isnull()]) == len(df[df.date_positive.isnull()]) == 0\n",
    "df.date_dose2 = pd.to_datetime(df.date_dose2, errors='coerce').dt.date\n",
    "\n",
    "# fill blank dates for computation of full vax \n",
    "df.date_dose2 = df.date_dose2.fillna(date.today()+timedelta(1)) \n",
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "c1a56478",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>date_positive</th>\n",
       "      <th>date_dose1</th>\n",
       "      <th>date_dose2</th>\n",
       "      <th>vaxtype</th>\n",
       "      <th>state</th>\n",
       "      <th>age</th>\n",
       "      <th>male</th>\n",
       "      <th>bid</th>\n",
       "      <th>malaysian</th>\n",
       "      <th>comorb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>22734</th>\n",
       "      <td>2021-09-17</td>\n",
       "      <td>2021-09-01</td>\n",
       "      <td>2021-06-15</td>\n",
       "      <td>2021-07-06</td>\n",
       "      <td>Pfizer</td>\n",
       "      <td>Johor</td>\n",
       "      <td>68</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22735</th>\n",
       "      <td>2021-09-17</td>\n",
       "      <td>2021-09-05</td>\n",
       "      <td>2021-06-16</td>\n",
       "      <td>2021-07-07</td>\n",
       "      <td>Pfizer</td>\n",
       "      <td>Johor</td>\n",
       "      <td>67</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22741</th>\n",
       "      <td>2021-09-17</td>\n",
       "      <td>2021-09-10</td>\n",
       "      <td>2021-06-20</td>\n",
       "      <td>2021-07-11</td>\n",
       "      <td>Sinovac</td>\n",
       "      <td>Terengganu</td>\n",
       "      <td>77</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22742</th>\n",
       "      <td>2021-09-17</td>\n",
       "      <td>2021-09-12</td>\n",
       "      <td>2021-06-03</td>\n",
       "      <td>2021-06-24</td>\n",
       "      <td>Sinovac</td>\n",
       "      <td>Terengganu</td>\n",
       "      <td>47</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             date date_positive  date_dose1  date_dose2  vaxtype       state  \\\n",
       "22734  2021-09-17    2021-09-01  2021-06-15  2021-07-06   Pfizer       Johor   \n",
       "22735  2021-09-17    2021-09-05  2021-06-16  2021-07-07   Pfizer       Johor   \n",
       "22741  2021-09-17    2021-09-10  2021-06-20  2021-07-11  Sinovac  Terengganu   \n",
       "22742  2021-09-17    2021-09-12  2021-06-03  2021-06-24  Sinovac  Terengganu   \n",
       "\n",
       "      age male bid malaysian comorb  \n",
       "22734  68    1   0         1      1  \n",
       "22735  67    1   0         1      1  \n",
       "22741  77    0   0         1      1  \n",
       "22742  47    0   0         1      1  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# keep only those who were infected at least 14 days after dose 2\n",
    "fvax = df[(df.date_positive - df.date_dose2).dt.days > 13].copy() \n",
    "fvax = fvax[fvax.date == date(2021,9,17)]\n",
    "fvax.head(10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
